
# Setup ------------------------------------------------------------------------
  # Options
  options(stringsAsFactors = F)
  # Packages
  library(data.table)
  library(magrittr)
  library(lubridate)
  library(stringr)
  # Directories
  dir_project <- "/Users/edwardarubin/Dropbox/Research/MyProjects/NaturalGas/"
  dir_csv     <- paste0(dir_project, "DataCsv/")
  dir_raw     <- paste0(dir_project, "DataRaw/")
  dir_r       <- paste0(dir_project, "DataR/")

# Load data --------------------------------------------------------------------
  # Load the Henry Hub spot price data
  hub_dt <- fread(paste0(dir_csv, "naturalGasPricesHenryHub.csv"))
  # Change names
  setnames(hub_dt, c("date", "spot_price"))
  # Format variables: convert date to Date format; convert prices from
  # millions of Btu to therms (divide by 10)
  hub_dt[, `:=`(
    date = date %>% mdy(),
    spot_price = spot_price / 10
    )]
  # Create dataset from min. date to max. date of HH spot price
  tmp <- data.table(date =
    seq.Date(from = min(hub_dt$date), to = max(hub_dt$date), by = "day"))
  # Merge the full date sequence with hub_dt; drop tmp
  hub_dt <- merge(x = tmp, y = hub_dt, by = "date", all = T)
  rm(tmp); gc()
  # Add lags of spot price (from 1-42 days)
  setorder(hub_dt, date)
  hub_dt[, paste0("spot_price_lag", 1:42) :=
    shift(x = spot_price, n = 1L:42L, fill = NA, type = "lag")]
  # Aggregate spot price summaries by week
  i1 <- seq(1, 42, 7)
  i2 <- seq(7, 42, 7)
  for(i in seq_along(i1)) {
    # Find the length of the sequence
    N <- length(i1[i]:i2[i])
    # Take subset
    tmp_hub <- hub_dt[, paste0("spot_price_lag", i1[i]:i2[i]), with = F]
    # Add count of non-missing data
    set(x = tmp_hub, j = "n", value = tmp_hub %>% is.na() %>% rowSums())
    tmp_hub[, n := N - n]
    # Take average
    tmp_avg <-
      (tmp_hub[, -"n", with = F] %>% rowSums(na.rm = T)) / tmp_hub$n
    # Add average price to actual dataset
    set(hub_dt, j = paste0("spot_price_lag_", i1[i], "_", i2[i]),
      value = tmp_avg)
    # Drop the individual days' lags from the datasets
    hub_dt[, paste0("spot_price_lag", i1[i]:i2[i]) := NULL]
  }

# Save -------------------------------------------------------------------------
  # Save as CSV
  write.csv(x = hub_dt,
    file = paste0(dir_csv, "dailyHenryHubPricesWithLags.csv"),
    row.names = F)
